Impact of Dietary Shifts on Gut Microbiome Dynamics

Multivariate Insights Using R

R for Bio Data Analysis

Group 16: Eric Torres, Lucia de Lamadrid, Konstantina Gkopi, Elena Iriondo and Jorge Santiago

2024-12-03

Introduction

Objective and Main Research Questions

Objective:

-

Research Questions:

1.

2.

Data Loading

  • MicrobiomeWithMetadata.csv (Main dataset):
    Contains the relative abundance of OTUs analyzed per observation, along with the conditions of each sample (e.g., diet, sex, etc.).

    library(tidyverse) 
    # Read the data
    metadata_df <- read_tsv(file = str_c('../data/01_data_metadata.tsv'))
    
    # Check the structure of the data
    head(metadata_df)
    # A tibble: 6 × 6,701
       Diet Source Donor CollectionMet   Sex     OTU0     OTU1     OTU2     OTU3
      <dbl>  <dbl> <dbl>         <dbl> <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
    1     0      0     0             0     0 1.56e-11 4.72e-11 1.23e-11 4.52e-11
    2     0      1     0             0     0 2.36e-11 9.53e-11 3.33e-11 2.67e-11
    3     0      2     0             1     0 6.77e-11 3.68e-11 8.02e-11 5.49e-11
    4     0      2     0             0     0 5.52e-11 9.89e-11 4.58e-11 3.54e-11
    5     0      3     0             0     0 5.24e-11 6.34e-11 2.35e-11 7.47e-11
    6     0      4     0             1     0 7.67e-11 7.22e-11 5.41e-11 1.20e-11
    # ℹ 6,692 more variables: OTU4 <dbl>, OTU5 <dbl>, OTU6 <dbl>, OTU7 <dbl>,
    #   OTU8 <dbl>, OTU9 <dbl>, OTU10 <dbl>, OTU11 <dbl>, OTU12 <dbl>, OTU13 <dbl>,
    #   OTU14 <dbl>, OTU15 <dbl>, OTU16 <dbl>, OTU17 <dbl>, OTU18 <dbl>,
    #   OTU19 <dbl>, OTU20 <dbl>, OTU21 <dbl>, OTU22 <dbl>, OTU23 <dbl>,
    #   OTU24 <dbl>, OTU25 <dbl>, OTU26 <dbl>, OTU27 <dbl>, OTU28 <dbl>,
    #   OTU29 <dbl>, OTU30 <dbl>, OTU31 <dbl>, OTU32 <dbl>, OTU33 <dbl>,
    #   OTU34 <dbl>, OTU35 <dbl>, OTU36 <dbl>, OTU37 <dbl>, OTU38 <dbl>, …
  • MicrobiomeMetadataDictionary.csv (Complementary):
    Provides the equivalence between numerical values and their corresponding sample conditions.

  • MicrobiomeOTUtaxonomy.csv (Complementary):
    Contains the taxonomic classification of each OTU analyzed.

  • Source:
    Explore Microbiome Dataset
    Curated version of the microbiome dataset from Turnbaugh et al. (original publication available in accompanying files).

Materials and Methods

Data Tidying and Filtering

  • Added a SampleID column to uniquely identify each sample + Transformed the dataset from wide to long format for easier analysis.
# Creation and relocation of SampleID
metadata_df <- metadata_df |>
  mutate(SampleID = row_number()) |>  # Create SampleID from the first column
  relocate(SampleID, 
           .before = everything())  # Move SampleID to the first position

metadata_df_long <- metadata_df |> 
  pivot_longer(
    cols = starts_with("OTU"), 
    names_to = "OTU", 
    values_to = "rel_abundance"
  )
head(metadata_df_long)
# A tibble: 6 × 8
  SampleID  Diet Source Donor CollectionMet   Sex OTU   rel_abundance
     <int> <dbl>  <dbl> <dbl>         <dbl> <dbl> <chr>         <dbl>
1        1     0      0     0             0     0 OTU0       1.56e-11
2        1     0      0     0             0     0 OTU1       4.72e-11
3        1     0      0     0             0     0 OTU2       1.23e-11
4        1     0      0     0             0     0 OTU3       4.52e-11
5        1     0      0     0             0     0 OTU4       2.72e-11
6        1     0      0     0             0     0 OTU5       2.71e-11
  • Filtering Low-Abundance OTUs: Retained OTUs contributing up to 95% of cumulative abundance to remove low-abundance and negligible contributors. Additionally, removed OTUs with extremely low relative abundances.

  • Replace the numeric codes with descriptive labels.

# Calculate cumulative contribution
cumulative_otus <- metadata_df_long |>
  group_by(OTU) |>
  summarize(mean_abundance = mean(rel_abundance)) |>
  arrange(desc(mean_abundance)) |>
  mutate(cumulative_abundance = cumsum(mean_abundance) / sum(mean_abundance))
# Filter OTUs contributing to 95% cumulative abundance
otus_to_keep <- cumulative_otus |>
  filter(cumulative_abundance <= 0.95) |>
  pull(OTU)
# Filter the metadata to retain only these OTUs
filtered_metadata <- metadata_df_long |>
  filter(OTU %in% otus_to_keep)
dim(filtered_metadata)
[1] 264600      8
# Set the stricter abundance threshold
abundance_threshold <- 1e-6
# Apply the threshold to filter OTUs
filtered_metadata_stricter <- filtered_metadata |>
  filter(rel_abundance >= abundance_threshold)
dim(filtered_metadata_stricter)
[1] 65938     8
filtered_metadata_stricter_label <- filtered_metadata_stricter |> 
  mutate(Diet = case_when(Diet == 0 ~ "LFPP",
                          Diet == 1 ~ "Western",
                          Diet == 2 ~ "CARBR",
                          Diet == 3 ~ "FATR",
                          Diet == 4 ~ "Suckling",
                          Diet == 5 ~ "Human")) |> 
  mutate(Source = case_when(Source == 0 ~ "Cecum1",
                          Source == 1 ~ "Cecum2", 
                          Source == 2 ~ "Colon1", 
                          Source == 3 ~ "Colon2", 
                          Source == 4 ~ "Feces",
                          Source == 5 ~ "SI1",
                          Source == 6 ~ "SI13", 
                          Source == 7 ~ "SI15", 
                          Source == 8 ~ "SI2", 
                          Source == 9 ~ "SI5",
                          Source == 10 ~ "SI9", 
                          Source == 11 ~ "Stomach", 
                          Source == 12 ~ "Cecum")) |> 
  mutate(Donor = case_when(Donor == 0 ~ "HMouseLFPP",
                          Donor == 1 ~ "CONVR", 
                          Donor == 2 ~ "Human", 
                          Donor == 3 ~ "Fresh", 
                          Donor == 4 ~ "Frozen",
                          Donor == 5 ~ "HMouseWestern", 
                          Donor == 6 ~ "CONVD")) |> 
  mutate(CollectionMet = case_when(CollectionMet == 0 ~ "Contents",
                                   CollectionMet == 1 ~ "Scraping")) |> 
  mutate(Sex = case_when(Sex == 0 ~ "Male",
                         Sex == 1 ~ "Female")) 

Our tidy data set…ready to be augmented!

We will use the OTUs taxonomy file to add columns with the names of phylum and class for each OTU, using left_join.

clean_df <- read_tsv('../data/02_metadata_long_filtered_label.tsv')
head(clean_df)
# A tibble: 6 × 9
   ...1 SampleID Diet  Source Donor      CollectionMet Sex   OTU   rel_abundance
  <dbl>    <dbl> <chr> <chr>  <chr>      <chr>         <chr> <chr>         <dbl>
1     1        1 LFPP  Cecum1 HMouseLFPP Contents      Male  OTU9       0.00257 
2     2        1 LFPP  Cecum1 HMouseLFPP Contents      Male  OTU77      0.00128 
3     3        1 LFPP  Cecum1 HMouseLFPP Contents      Male  OTU1…      0.00449 
4     4        1 LFPP  Cecum1 HMouseLFPP Contents      Male  OTU1…      0.000641
5     5        1 LFPP  Cecum1 HMouseLFPP Contents      Male  OTU3…      0.000641
6     6        1 LFPP  Cecum1 HMouseLFPP Contents      Male  OTU3…      0.00257 
otu_df_original <- read.table('../data/01_data_otu.tsv', header = TRUE, sep = ",")
head(otu_df_original)
  OTU.ID  Kingdom        Phylum         Class           Order
1   OTU0 Bacteria                                            
2   OTU1 Bacteria    Firmicutes    Clostridia   Clostridiales
3   OTU2 Bacteria    Firmicutes       Bacilli Lactobacillales
4   OTU3 Bacteria Bacteroidetes Bacteroidetes   Bacteroidales
5   OTU4 Bacteria Bacteroidetes                              
6   OTU5 Bacteria    Firmicutes    Clostridia   Clostridiales
              Family           Genus X X.1
1                                         
2    Ruminococcaceae                      
3    Enterococcaceae    Enterococcus      
4 Porphyromonadaceae Parabacteroides      
5                                         
6                                         
clean_df_augm <- clean_df |>  
  left_join(otu_df_modified, 
            join_by(OTU == OTU.ID)) |> 
  relocate(Phylum, Class, .after = OTU) 
head(clean_df_augm)
# A tibble: 6 × 11
   ...1 SampleID Diet  Source Donor      CollectionMet Sex   OTU    Phylum Class
  <dbl>    <dbl> <chr> <chr>  <chr>      <chr>         <chr> <chr>  <chr>  <chr>
1     1        1 LFPP  Cecum1 HMouseLFPP Contents      Male  OTU9   Firmi… Clos…
2     2        1 LFPP  Cecum1 HMouseLFPP Contents      Male  OTU77  Firmi… Clos…
3     3        1 LFPP  Cecum1 HMouseLFPP Contents      Male  OTU155 Firmi… Clos…
4     4        1 LFPP  Cecum1 HMouseLFPP Contents      Male  OTU170 Firmi… Clos…
5     5        1 LFPP  Cecum1 HMouseLFPP Contents      Male  OTU336 Firmi… Clos…
6     6        1 LFPP  Cecum1 HMouseLFPP Contents      Male  OTU370 Bacte… Bact…
# ℹ 1 more variable: rel_abundance <dbl>

Results

Microbiota composition in terms of phyla in different:

  • sources and diet types

  • diet and donor combination

05

Principal Component Analysis on Phylum-Level Aggregated Microbiome Data

# Aggregate relative abundances by phylum
aggregated_data <- clean_df_augm |>
  filter(Donor == "Fresh") |> #we select the first generation of humanised mice (we don't select the western and lfpp diet as these are the only diets these mice follow)
  group_by(SampleID, Phylum, Diet) |> 
  summarize(rel_abundance = sum(rel_abundance), .groups = "drop")

# Pivot wider and one-hot coding of diet variable to prepare for PCA (SampleID by Phylum)
aggregated_wide <- aggregated_data |> 
  pivot_wider(names_from = Phylum, values_from = rel_abundance) |> 
  mutate(Diet = case_when(Diet == "LFPP" ~ 0, Diet == "Western" ~ 1)) #pca uses numerical values so we modify the diet column

# Check of the aggregated data
head(aggregated_wide)
# A tibble: 6 × 8
  SampleID  Diet Actinobacteria Bacteroidetes Firmicutes Proteobacteria
     <dbl> <dbl>          <dbl>         <dbl>      <dbl>          <dbl>
1      339     1        0.00316        0.463       0.470        0.00671
2      340     1        0.0180         0.106       0.811        0.00138
3      341     1        0.0121         0.0443      0.873        0.00792
4      342     1        0.00531        0.145       0.786        0.0135 
5      343     1        0.00801        0.607       0.300        0.0300 
6      344     1        0.631          0.199       0.141        0.00467
# ℹ 2 more variables: Unclassified <dbl>, Verrucomicrobia <dbl>

Principal Component Analysis on Phylum-Level Aggregated Microbiome Data

Analysis of Microbiome Clusters by Donor Groups Using Hierarchical Clustering

# Read the metadata in wide format
filtered_metadata_wider <- read_tsv("../data/02_metadata_wide_filtered_label.tsv")
# Select OTU columns
otu_data <- filtered_metadata_wider |>
  select(starts_with("OTU"))

# Scale the OTU data
otu_data_scaled <- otu_data |> 
  scale()

# Convert scaled matrix back to tibble for tidyverse compatibility
otu_data_scaled <- as_tibble(otu_data_scaled)

# Add relevant metadata (e.g., Donor)
otu_data_with_metadata <- otu_data_scaled |>
  mutate(Donor = filtered_metadata_wider |> pull(Donor))

# Compute Euclidean distance matrix
dist_matrix <- otu_data_scaled |>
  dist()

# Perform hierarchical clustering
hclust_result <- hclust(dist_matrix, method = "ward.D2")

# Cut dendrogram into 3 clusters
cluster_labels <- cutree(hclust_result, k = 3) |>
  as_tibble() |>
  rename(Cluster = value)

# Attach cluster labels to metadata
clustered_metadata <- filtered_metadata_wider |> 
  mutate(Cluster = cluster_labels |> pull(Cluster))

07

Discussion

……